##############################################
# The Archipelago Capitalism of Citizenship-by-Investment
# Jelena Dzankic, Mira Seyfettinoglu, Ayelet Shachar, Maarten Vink, Luuk van der Baaren
# Comparative Political Studies, accepted for publication, October 2025
#
# Code for descriptive trend plot
# Code prepared by Maarten Vink with research assistance by Tarek Jaziri Arjona
##############################################

#start with clean workspace
rm(list=ls(all=TRUE))

#load packages
library(scales)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(gridtext)
library(grid)
library(ggrepel)
library(janitor)
library(writexl)
library(readxl)
library(viridis)
library(Hmisc)

# read the dataset and add CBI generic and specific variables and add labelled version of cbi_grouped
dat <- read.csv("01_data/cbi_data_full.csv") |>
  mutate(cbi_cat1 = ifelse(cbi_cat == 0, "None",
                               ifelse(cbi_cat == 1, "Generic provision",
                                      ifelse(cbi_cat == 2, "Investment provision without specified sum",
                                      ifelse(cbi_cat == 3, "CBI", NA)))))
dat$cbi_cat1 <- factor(dat$cbi_cat1,levels = c("Generic provision", "Investment provision without specified sum", "None", "CBI"))
summary(dat$cbi_cat1)


# Dataset description

#View(dat) #11,002 obs of 53 variables
head(dat)
summary(dat)

#General descriptive statistics of the dataset
# Number of countries
n_iso3c <- length(unique(dat$iso3c))
n_iso3c
## [1] 196

# check number of countries in first and last year
dat |> filter(year == 1960) |> count(iso3c) # 111 countries
dat |> filter(year == 2023) |> count(iso3c) # 195 countries

#freq CBI categorical in 1960, 1990 and 2023
dat |> 
  filter(year == 1960 | year == 2023) |>
  group_by(year, cbi_cat1) %>%
  summarise (n = n()) %>%
  mutate(freq = 100*n/sum(n))
# year cbi_cat1                                       n   freq
# 1  1960 Generic provision                             12 10.8  
# 2  1960 Investment provision without specified sum     1  0.901
# 3  1960 None                                          98 88.3  
# 4  2023 Generic provision                             24 12.3  
# 5  2023 Investment provision without specified sum     4  2.05 
# 6  2023 None                                         148 75.9  
# 7  2023 CBI                                           19  9.74 


## Figure 1. CBI trend plot: changes specific v generic

# Fig1a: plot changes CBI specific
changes_cbi <- dat |>
  filter(cbi_change != "no change") |>
  ggplot(aes(year, fill = cbi_change))+
                  geom_dotplot(binwidth=1, stackgroups = TRUE, binpositions = "all")+
                  scale_y_continuous("  \n",
                                     limits=c(0,6), breaks = seq(0,8,2))+
                  scale_x_continuous("",
                                     limits=c(1960,2025), breaks = seq(1960, 2025, 20))+
                  theme_minimal()+
                  ggtitle("Citizenship by investment")+
                  scale_fill_manual(name = "", labels = c("introduction", "removal"),
                                      values = c("#5ab4ac", "#c1cdcd"))+
                  theme(text = element_text(size = 20))+
                  theme(legend.position="top")+
                  coord_fixed(ratio=1)
changes_cbi

# descriptive table for SM3
#select only years with changes in CBI 
datchange_cbi <- dat |> 
  filter(cbi_change != 'no change') |>
  select(year, iso3c, country_name, cbi_change, cbi_cat1, world_region) |>
  arrange(year)
datchange_cbi
# 49 entries with changes in CBI 

#save table
write.csv(datchange_cbi, "02_plots_tables/TableSM3_changes.CBI.csv", 
          row.names = F)
latex(datchange_cbi, longtable=TRUE, rowlabel = FALSE, file="02_plots_tables/TableSM3_changes.CBI.tex")      

#plot changes CBI generic
changes_gen <- dat |> 
  filter(cbi_gen_change != 'no change') |>
  ggplot(aes(year, fill = cbi_gen_change))+
  geom_dotplot(binwidth=1, stackgroups = TRUE, binpositions = "all")+
  scale_y_continuous("  \n",
                     limits=c(0,6), breaks = seq(0,8,2))+
  scale_x_continuous("",
                     limits=c(1960,2025), breaks = seq(1960, 2025, 20))+
  theme_minimal()+
  ggtitle("Generic provision or investment without specified sum") +
  scale_fill_manual(name = "", labels = c("introduction", "removal"),
                    values = c("#d8b350", "#c1cdcd"))+
  theme(text = element_text(size = 20))+
  theme(legend.position="top")+
  coord_fixed(ratio=1)
changes_gen

# descriptive table for SM4
#select only years with changes in CBI generic 
datchange_gen <- dat |> 
  filter(cbi_gen_change != 'no change') |>
  select(year, iso3c, country_name, cbi_gen_change, cbi_cat1, world_region) |>
  arrange(year)
datchange_gen
# 35 entries with changes in generic 2

#save table
write.csv(datchange_gen, "02_plots_tables/TableSM4_changes.CBI.gen.csv", 
          row.names = F)
latex(datchange_gen, longtable=TRUE, rowlabel = FALSE, file="02_plots_tables/TableSM4_changes.CBI.gen.tex")      


# save combi plot
jpeg('02_plots_tables/Fig1.CBI.generic.changes.combi.1960-2023.jpeg',  width = 14, height = 8, units = 'in', res = 600)
combi_plot <- grid.arrange(arrangeGrob(changes_cbi,changes_gen, ncol=1, heights=c(0.6,0.4)))
dev.off()


## Figure 2: CBI categorical trend by tax haven status

# check data
haven_cbi <- dat |> 
 # filter(haven_any==1) |>
  group_by(year, haven, cbi_cat1) |>
  summarise (n = n()) |>
  mutate(freq = round(n / sum(n), 2)) |>
  mutate(haven = as.factor(haven)) |>
  ungroup()

#plot
trend_haven <- dat |> 
  filter(haven==1) %>%
  group_by(year, cbi_cat1) %>%
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  ggplot(aes(year, freq, fill = cbi_cat1)) +
  geom_area(alpha=0.6 , linewidth=.5, colour="white")+
  scale_y_continuous(labels = scales::percent)+
  scale_x_continuous("",
                     limits=c(1960,2025), breaks = seq(1960, 2025, 20))+
  scale_fill_manual(name = "Percent of states with", labels = c("Generic provision", "Investment without specified sum", "No provision", "CBI"),
                    values = c("#d8b365", "pink", "#c1cdcd", "#5ab4ac"))+
  ylab("") +
  xlab("")+
  ggtitle("Tax havens")+
  theme_minimal()+
  theme(text = element_text(size = 20))+
  theme(legend.position="none") 
trend_haven

trend_no_haven <- dat |> 
  filter(haven==0) %>%
  group_by(year, cbi_cat1) %>%
  summarise (n = n()) %>%
  mutate(freq = n / sum(n)) %>%
  ggplot(aes(year, freq, fill = cbi_cat1)) +
  geom_area(alpha=0.6 , linewidth=.5, colour="white")+
  scale_y_continuous(labels = scales::percent)+
  scale_x_continuous("",
                     limits=c(1960,2025), breaks = seq(1960, 2025, 20))+
  scale_fill_manual(name = "Percent of states with", 
                    labels = c("Generic provision", "Investment without specified sum", "No provision", "CBI"),
                    values = c("#d8b365", "pink", "#c1cdcd", "#5ab4ac"),
                    guide = guide_legend(reverse = TRUE))+
  ylab("") +
  xlab("")+
  ggtitle("Other")+
  theme_minimal()+
  theme(text = element_text(size = 20))+
  theme(legend.position="bottom") 
trend_no_haven

# save combi plot
jpeg('02_plots_tables/Fig2.CBI.trends.haven.jpeg',  width = 14, height = 12, units = 'in', res = 600)
grid.arrange(arrangeGrob(trend_haven, trend_no_haven, ncol=1, heights=c(0.55,0.45)))
dev.off()

### END
